Introduction
This is a Notebook for visualizing dosage plot on Arabidopsis sample Gaetan. The input data is originally from bin-by-sam analysis. Details are as follow: 1. Bin-by-sam.py (credit to Meric). 3 samples input into this python script: the chromoanagenesis sample, two controls from mixture of low coverage WT Arabidopsis (in order to have a similar sequence depth with the chromoanagenesis one). We tried different bin sizes, from 2.5kb to 100kb (2.5, 5, 10, 100). 2. Plot out relative reads coverage in JMP. By using JMP, we plot out the relative reads coverage for three individuals, and confirm CNVs at the beginning part of chromoanagenesis sample. 3. Modify the .txt file for visualization by ggplot. To have a clear plot for the review paper, we decide to remake the dosage plot using ggplot. Since control1 is using as the baseline in bin-by-sam, so the relative read coverage is all 2, so we delete that samples’ information in the input file and only keep the chromoanagenesis sample and control2.
Outlines
- load libraries and data
plot 100kb all chromosomes
all.100kb <- ggplot(arabidopsis.100k, aes(Strt,Relative.Read.Coverage,color=Sample)) +
geom_point(alpha=.6) +
scale_color_manual(values = c("#999999","#E69F00")) +
theme(axis.text.x = element_text(size = 8, angle = 90), axis.title.x = element_text(size = 14, margin = margin(t=10)), axis.title.y = element_text(size = 14, margin = margin(r=10)), legend.position = "None") +
scale_x_continuous(name="Genomic Position", breaks = NULL) +
scale_y_continuous(name="Relative read \ncoverage") +
facet_wrap(~Chrom, nrow = 1, scales = "free_x")
all.100kb

save the 100kb plot
ggsave("/Users/wendy/Desktop/Lab/arabidopsis/Arabidopsis_dosage_100kb_20211103.png",width = 10, height = 2)
plot 5kb chromosomes 1
all.5kb <- ggplot(arabidopsis.5k, aes(Strt,as.numeric(Relative.Read.Coverage.5k),color=Sample)) +
geom_point(alpha=.6) +
scale_color_manual(values = c("#999999","#E69F00")) +
theme(axis.text.x = element_text(size = 8, angle = 90), axis.title.x = element_text(size = 14, margin = margin(t=10)), axis.title.y = element_text(size = 14, margin = margin(r=10)), legend.position = "None") +
scale_x_continuous(name = "Chromosome 1", breaks = NULL) +
scale_y_continuous(name = "Relative Read \nCoverage",limits = c(1,4))
all.5kb
Warning: Removed 129 rows containing missing values (geom_point).

save the 5kb plot
ggsave("/Users/wendy/Desktop/Lab/arabidopsis/plots/Arabidopsis_dosage_5kb_chr1_20211108.png",width = 15, height = 2)
Warning: Removed 129 rows containing missing values (geom_point).
plot 5kb chromosomes 1 0-1M
all.5kb.12M <- ggplot(arabidopsis.5k.12M, aes(Strt,as.numeric(Relative.Read.Coverage.5k),color=Sample)) +
geom_point(alpha=.6) +
scale_color_manual(values = c("#999999","#E69F00")) +
theme(axis.text.x = element_text(size = 8, angle = 90), axis.title.x = element_text(size = 14, margin = margin(t=10)), axis.title.y = element_text(size = 14, margin = margin(r=10)),legend.position = "bottom") +
scale_x_continuous(name = "Chromosome 1: 12Mb to 17Mb", breaks = NULL) +
scale_y_continuous(name = "Relative Read \nCoverage",limits = c(1,4))
all.5kb.12M
Warning: Removed 35 rows containing missing values (geom_point).

add potential junction data
arabidopsis.anno <- read.csv('/Users/wendy/Desktop/Lab/arabidopsis/crossread-selected-2ctrl-500bp-annotate-20211027.txt', sep = '\t')
head(arabidopsis.anno)
change the order for breakpoint1
arabidopsis.anno$Chrom1 <- factor(arabidopsis.anno$Chrom1,levels = c("Chr5","Chr4","Chr3","Chr2","Chr1"))
Two dimentional plot
scatplot <- ggplot(arabidopsis.anno, aes(Chrom1_binstart,Chrom2_binstart,alpha=Size,size="1")) +
geom_point() +
scale_color_manual(values = c('#000000')) +
scale_alpha_manual(values = c(0,0.2)) +
scale_size_manual(values = c(1.5)) +
theme(legend.position = 'none', axis.text.x = element_blank(), axis.text.y = element_blank(), axis.ticks = element_blank() ,axis.title.x = element_text(size = 14, margin = margin(t=10)), axis.title.y = element_text(size = 14,margin = margin(r=10))) +
scale_x_continuous(name="Breakpoint1") +
scale_y_continuous(name="Breakpoint2") +
facet_grid(rows = vars(arabidopsis.anno$Chrom1), cols = vars(arabidopsis.anno$Chrom2),scales = "free")
scatplot

save the scatter plot
ggsave("/Users/wendy/Desktop/Lab/arabidopsis/plots/Arabidopsis_potential_junctions_scatplot_20211103.png",height = 5, width = 5)
arragne two plots on one page
onepage <- ggarrange(all.100kb,all.5kb,all.5kb.12M,labels = c("A","B","C"), ncol = 1, nrow = 3, widths = 10, heights = c(3,3,4))
Warning: Removed 129 rows containing missing values (geom_point).
Warning: Removed 35 rows containing missing values (geom_point).
onepage

save one page three plots
ggsave("/Users/wendy/Desktop/Lab/arabidopsis/plots/Arabidopsis_dosage_all_chr1_12M_20220114.png",width = 10)
Saving 10 x 7 in image
LS0tCnRpdGxlOiAiQXJhYmlkb3BzaXNfZG9zYWdlX3Bsb3RfcG90ZW50aWFsX2p1bmN0aW9uX3Zpc3VhbGl6YXRpb24iCm91dHB1dDoKICBodG1sX25vdGVib29rOgogICAgbnVtYmVyX3NlY3Rpb25zOiB5ZXMKICAgIHRvYzogeWVzCiAgICB0b2NfZmxvYXQ6IHllcwogIGh0bWxfZG9jdW1lbnQ6CiAgICBkZl9wcmludDogcGFnZWQKICAgIHRvYzogeWVzCi0tLQoKIyBJbnRyb2R1Y3Rpb24gIwpUaGlzIGlzIGEgTm90ZWJvb2sgZm9yIHZpc3VhbGl6aW5nIGRvc2FnZSBwbG90IG9uIEFyYWJpZG9wc2lzIHNhbXBsZSBHYWV0YW4uIApUaGUgaW5wdXQgZGF0YSBpcyBvcmlnaW5hbGx5IGZyb20gYmluLWJ5LXNhbSBhbmFseXNpcy4gRGV0YWlscyBhcmUgYXMgZm9sbG93OgoxLiBCaW4tYnktc2FtLnB5IChjcmVkaXQgdG8gTWVyaWMpLiAzIHNhbXBsZXMgaW5wdXQgaW50byB0aGlzIHB5dGhvbiBzY3JpcHQ6IHRoZSBjaHJvbW9hbmFnZW5lc2lzIHNhbXBsZSwgdHdvIGNvbnRyb2xzIGZyb20gbWl4dHVyZSBvZiBsb3cgY292ZXJhZ2UgV1QgQXJhYmlkb3BzaXMgKGluIG9yZGVyIHRvIGhhdmUgYSBzaW1pbGFyIHNlcXVlbmNlIGRlcHRoIHdpdGggdGhlIGNocm9tb2FuYWdlbmVzaXMgb25lKS4gV2UgdHJpZWQgZGlmZmVyZW50IGJpbiBzaXplcywgZnJvbSAyLjVrYiB0byAxMDBrYiAoMi41LCA1LCAxMCwgMTAwKS4gCjIuIFBsb3Qgb3V0IHJlbGF0aXZlIHJlYWRzIGNvdmVyYWdlIGluIEpNUC4gQnkgdXNpbmcgSk1QLCB3ZSBwbG90IG91dCB0aGUgcmVsYXRpdmUgcmVhZHMgY292ZXJhZ2UgZm9yIHRocmVlIGluZGl2aWR1YWxzLCBhbmQgY29uZmlybSBDTlZzIGF0IHRoZSBiZWdpbm5pbmcgcGFydCBvZiBjaHJvbW9hbmFnZW5lc2lzIHNhbXBsZS4gCjMuIE1vZGlmeSB0aGUgLnR4dCBmaWxlIGZvciB2aXN1YWxpemF0aW9uIGJ5IGdncGxvdC4gVG8gaGF2ZSBhIGNsZWFyIHBsb3QgZm9yIHRoZSByZXZpZXcgcGFwZXIsIHdlIGRlY2lkZSB0byByZW1ha2UgdGhlIGRvc2FnZSBwbG90IHVzaW5nIGdncGxvdC4gU2luY2UgY29udHJvbDEgaXMgdXNpbmcgYXMgdGhlIGJhc2VsaW5lIGluIGJpbi1ieS1zYW0sIHNvIHRoZSByZWxhdGl2ZSByZWFkIGNvdmVyYWdlIGlzIGFsbCAyLCBzbyB3ZSBkZWxldGUgdGhhdCBzYW1wbGVzJyBpbmZvcm1hdGlvbiBpbiB0aGUgaW5wdXQgZmlsZSBhbmQgb25seSBrZWVwIHRoZSBjaHJvbW9hbmFnZW5lc2lzIHNhbXBsZSBhbmQgY29udHJvbDIuIAoKIyBPdXRsaW5lcyAjCjEuIGxvYWQgbGlicmFyaWVzIGFuZCBkYXRhCjIuIAoKIyBJbnB1dCBsaWJyYXJpZXMgYW5kIGRhdGEgIwojIyBhZGQgbGlicmFyaWVzICMjCmBgYHtyfQpsaWJyYXJ5KCJ0aWR5dmVyc2UiKQpsaWJyYXJ5KCJnZ3Bsb3QyIikKbGlicmFyeSgiZ2dwdWJyIikKYGBgCgojIyBhZGQgZGF0YSAjIwpgYGB7cn0KYXJhYmlkb3BzaXMuNWsgPC0gcmVhZC5jc3YoIi9Vc2Vycy93ZW5keS9EZXNrdG9wL0xhYi9hcmFiaWRvcHNpcy9kb3NhZ2VfcGxvdHRpbmdfaW5wdXRfNWtiX2NocjFfMjAyMTExMDgudHh0IixzZXAgPSAiXHQiKQpoZWFkKGFyYWJpZG9wc2lzLjVrKQpgYGAKCmBgYHtyfQphcmFiaWRvcHNpcy4xMDBrIDwtIHJlYWQuY3N2KCIvVXNlcnMvd2VuZHkvRGVza3RvcC9MYWIvYXJhYmlkb3BzaXMvZG9zYWdlX3Bsb3R0aW5nX2lucHV0XzEwMGtiXzIwMjExMTAzLnR4dCIsc2VwID0gIlx0IikKaGVhZChhcmFiaWRvcHNpcy4xMDBrKQpgYGAKCmBgYHtyfQphcmFiaWRvcHNpcy41ay4xMk0gPC0gcmVhZC5jc3YoIi9Vc2Vycy93ZW5keS9EZXNrdG9wL0xhYi9hcmFiaWRvcHNpcy9kb3NhZ2VfcGxvdHRpbmdfaW5wdXRfNWtiX2NocjFfMTJNLTIwMjIwMTE0LnR4dCIsc2VwID0gIlx0IikKaGVhZChhcmFiaWRvcHNpcy41ay4xMk0pCmBgYAoKCiMgcGxvdCAxMDBrYiBhbGwgY2hyb21vc29tZXMgIwpgYGB7cn0KYWxsLjEwMGtiIDwtIGdncGxvdChhcmFiaWRvcHNpcy4xMDBrLCBhZXMoU3RydCxSZWxhdGl2ZS5SZWFkLkNvdmVyYWdlLGNvbG9yPVNhbXBsZSkpICsKICBnZW9tX3BvaW50KGFscGhhPS42KSArCiAgc2NhbGVfY29sb3JfbWFudWFsKHZhbHVlcyA9IGMoIiM5OTk5OTkiLCIjRTY5RjAwIikpICsKICB0aGVtZShheGlzLnRleHQueCA9IGVsZW1lbnRfdGV4dChzaXplID0gOCwgYW5nbGUgPSA5MCksIGF4aXMudGl0bGUueCA9IGVsZW1lbnRfdGV4dChzaXplID0gMTQsIG1hcmdpbiA9IG1hcmdpbih0PTEwKSksIGF4aXMudGl0bGUueSA9IGVsZW1lbnRfdGV4dChzaXplID0gMTQsIG1hcmdpbiA9IG1hcmdpbihyPTEwKSksIGxlZ2VuZC5wb3NpdGlvbiA9ICJOb25lIikgKyAKICBzY2FsZV94X2NvbnRpbnVvdXMobmFtZT0iR2Vub21pYyBQb3NpdGlvbiIsIGJyZWFrcyA9IE5VTEwpICsKICBzY2FsZV95X2NvbnRpbnVvdXMobmFtZT0iUmVsYXRpdmUgcmVhZCBcbmNvdmVyYWdlIikgKwogIGZhY2V0X3dyYXAofkNocm9tLCBucm93ID0gMSwgc2NhbGVzID0gImZyZWVfeCIpCmFsbC4xMDBrYgpgYGAKCiMgc2F2ZSB0aGUgMTAwa2IgcGxvdCAjIwpgYGB7cn0KZ2dzYXZlKCIvVXNlcnMvd2VuZHkvRGVza3RvcC9MYWIvYXJhYmlkb3BzaXMvQXJhYmlkb3BzaXNfZG9zYWdlXzEwMGtiXzIwMjExMTAzLnBuZyIsd2lkdGggPSAxMCwgaGVpZ2h0ID0gMikKYGBgCgoKIyBwbG90IDVrYiBjaHJvbW9zb21lcyAxICMKYGBge3J9CmFsbC41a2IgPC0gZ2dwbG90KGFyYWJpZG9wc2lzLjVrLCBhZXMoU3RydCxhcy5udW1lcmljKFJlbGF0aXZlLlJlYWQuQ292ZXJhZ2UuNWspLGNvbG9yPVNhbXBsZSkpICsKICBnZW9tX3BvaW50KGFscGhhPS42KSArCiAgc2NhbGVfY29sb3JfbWFudWFsKHZhbHVlcyA9IGMoIiM5OTk5OTkiLCIjRTY5RjAwIikpICsKICB0aGVtZShheGlzLnRleHQueCA9IGVsZW1lbnRfdGV4dChzaXplID0gOCwgYW5nbGUgPSA5MCksIGF4aXMudGl0bGUueCA9IGVsZW1lbnRfdGV4dChzaXplID0gMTQsIG1hcmdpbiA9IG1hcmdpbih0PTEwKSksIGF4aXMudGl0bGUueSA9IGVsZW1lbnRfdGV4dChzaXplID0gMTQsIG1hcmdpbiA9IG1hcmdpbihyPTEwKSksIGxlZ2VuZC5wb3NpdGlvbiA9ICJOb25lIikgKwogIHNjYWxlX3hfY29udGludW91cyhuYW1lID0gIkNocm9tb3NvbWUgMSIsIGJyZWFrcyA9IE5VTEwpICsKICBzY2FsZV95X2NvbnRpbnVvdXMobmFtZSA9ICJSZWxhdGl2ZSBSZWFkIFxuQ292ZXJhZ2UiLGxpbWl0cyA9IGMoMSw0KSkKYWxsLjVrYgpgYGAKCiMgc2F2ZSB0aGUgNWtiIHBsb3QgIyMKYGBge3J9Cmdnc2F2ZSgiL1VzZXJzL3dlbmR5L0Rlc2t0b3AvTGFiL2FyYWJpZG9wc2lzL3Bsb3RzL0FyYWJpZG9wc2lzX2Rvc2FnZV81a2JfY2hyMV8yMDIxMTEwOC5wbmciLHdpZHRoID0gMTUsIGhlaWdodCA9IDIpCmBgYAoKCiMgcGxvdCA1a2IgY2hyb21vc29tZXMgMSAwLTFNICMKYGBge3J9CmFsbC41a2IuMTJNIDwtIGdncGxvdChhcmFiaWRvcHNpcy41ay4xMk0sIGFlcyhTdHJ0LGFzLm51bWVyaWMoUmVsYXRpdmUuUmVhZC5Db3ZlcmFnZS41ayksY29sb3I9U2FtcGxlKSkgKwogIGdlb21fcG9pbnQoYWxwaGE9LjYpICsKICBzY2FsZV9jb2xvcl9tYW51YWwodmFsdWVzID0gYygiIzk5OTk5OSIsIiNFNjlGMDAiKSkgKwogIHRoZW1lKGF4aXMudGV4dC54ID0gZWxlbWVudF90ZXh0KHNpemUgPSA4LCBhbmdsZSA9IDkwKSwgYXhpcy50aXRsZS54ID0gZWxlbWVudF90ZXh0KHNpemUgPSAxNCwgbWFyZ2luID0gbWFyZ2luKHQ9MTApKSwgYXhpcy50aXRsZS55ID0gZWxlbWVudF90ZXh0KHNpemUgPSAxNCwgbWFyZ2luID0gbWFyZ2luKHI9MTApKSxsZWdlbmQucG9zaXRpb24gPSAiYm90dG9tIikgKwogIHNjYWxlX3hfY29udGludW91cyhuYW1lID0gIkNocm9tb3NvbWUgMTogMTJNYiB0byAxN01iIiwgYnJlYWtzID0gTlVMTCkgKwogIHNjYWxlX3lfY29udGludW91cyhuYW1lID0gIlJlbGF0aXZlIFJlYWQgXG5Db3ZlcmFnZSIsbGltaXRzID0gYygxLDQpKQphbGwuNWtiLjEyTQpgYGAKCiMjIGFkZCBwb3RlbnRpYWwganVuY3Rpb24gZGF0YSAjIwpgYGB7cn0KYXJhYmlkb3BzaXMuYW5ubyA8LSByZWFkLmNzdignL1VzZXJzL3dlbmR5L0Rlc2t0b3AvTGFiL2FyYWJpZG9wc2lzL2Nyb3NzcmVhZC1zZWxlY3RlZC0yY3RybC01MDBicC1hbm5vdGF0ZS0yMDIxMTAyNy50eHQnLCBzZXAgPSAnXHQnKQpoZWFkKGFyYWJpZG9wc2lzLmFubm8pCmBgYAoKIyBjaGFuZ2UgdGhlIG9yZGVyIGZvciBicmVha3BvaW50MSAjCmBgYHtyfQphcmFiaWRvcHNpcy5hbm5vJENocm9tMSA8LSBmYWN0b3IoYXJhYmlkb3BzaXMuYW5ubyRDaHJvbTEsbGV2ZWxzID0gYygiQ2hyNSIsIkNocjQiLCJDaHIzIiwiQ2hyMiIsIkNocjEiKSkKYGBgCgojIFR3byBkaW1lbnRpb25hbCBwbG90ICMKYGBge3J9CnNjYXRwbG90IDwtIGdncGxvdChhcmFiaWRvcHNpcy5hbm5vLCBhZXMoQ2hyb20xX2JpbnN0YXJ0LENocm9tMl9iaW5zdGFydCxhbHBoYT1TaXplLHNpemU9IjEiKSkgKwogIGdlb21fcG9pbnQoKSArCiAgc2NhbGVfY29sb3JfbWFudWFsKHZhbHVlcyA9IGMoJyMwMDAwMDAnKSkgKwogIHNjYWxlX2FscGhhX21hbnVhbCh2YWx1ZXMgPSBjKDAsMC4yKSkgKwogIHNjYWxlX3NpemVfbWFudWFsKHZhbHVlcyA9IGMoMS41KSkgKwogIHRoZW1lKGxlZ2VuZC5wb3NpdGlvbiA9ICdub25lJywgYXhpcy50ZXh0LnggPSBlbGVtZW50X2JsYW5rKCksIGF4aXMudGV4dC55ID0gZWxlbWVudF9ibGFuaygpLCBheGlzLnRpY2tzID0gZWxlbWVudF9ibGFuaygpICxheGlzLnRpdGxlLnggPSBlbGVtZW50X3RleHQoc2l6ZSA9IDE0LCBtYXJnaW4gPSBtYXJnaW4odD0xMCkpLCBheGlzLnRpdGxlLnkgPSBlbGVtZW50X3RleHQoc2l6ZSA9IDE0LG1hcmdpbiA9IG1hcmdpbihyPTEwKSkpICsKICBzY2FsZV94X2NvbnRpbnVvdXMobmFtZT0iQnJlYWtwb2ludDEiKSArCiAgc2NhbGVfeV9jb250aW51b3VzKG5hbWU9IkJyZWFrcG9pbnQyIikgKyAKICBmYWNldF9ncmlkKHJvd3MgPSB2YXJzKGFyYWJpZG9wc2lzLmFubm8kQ2hyb20xKSwgY29scyA9IHZhcnMoYXJhYmlkb3BzaXMuYW5ubyRDaHJvbTIpLHNjYWxlcyA9ICJmcmVlIikKc2NhdHBsb3QKYGBgCgojIHNhdmUgdGhlIHNjYXR0ZXIgcGxvdCAjIwpgYGB7cn0KZ2dzYXZlKCIvVXNlcnMvd2VuZHkvRGVza3RvcC9MYWIvYXJhYmlkb3BzaXMvcGxvdHMvQXJhYmlkb3BzaXNfcG90ZW50aWFsX2p1bmN0aW9uc19zY2F0cGxvdF8yMDIxMTEwMy5wbmciLGhlaWdodCA9IDUsIHdpZHRoID0gNSkKYGBgCgojIGFycmFnbmUgdHdvIHBsb3RzIG9uIG9uZSBwYWdlICMKYGBge3J9Cm9uZXBhZ2UgPC0gZ2dhcnJhbmdlKGFsbC4xMDBrYixhbGwuNWtiLGFsbC41a2IuMTJNLGxhYmVscyA9IGMoIkEiLCJCIiwiQyIpLCBuY29sID0gMSwgbnJvdyA9IDMsIHdpZHRocyA9IDEwLCBoZWlnaHRzID0gYygzLDMsNCkpCm9uZXBhZ2UKYGBgCgojIHNhdmUgb25lIHBhZ2UgdGhyZWUgcGxvdHMgIwpgYGB7cn0KZ2dzYXZlKCIvVXNlcnMvd2VuZHkvRGVza3RvcC9MYWIvYXJhYmlkb3BzaXMvcGxvdHMvQXJhYmlkb3BzaXNfZG9zYWdlX2FsbF9jaHIxXzEyTV8yMDIyMDExNC5wbmciLHdpZHRoID0gMTApCmBgYAoKCgoKCg==